In [6]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import plotly.colors as colors
pio.templates.default = "plotly_white"
data = pd.read_csv("Sample - Superstore.csv", encoding='latin-1')
print(data)
Row ID Order ID Order Date Ship Date Ship Mode \
0 1 CA-2016-152156 11/8/2016 11/11/2016 Second Class
1 2 CA-2016-152156 11/8/2016 11/11/2016 Second Class
2 3 CA-2016-138688 6/12/2016 6/16/2016 Second Class
3 4 US-2015-108966 10/11/2015 10/18/2015 Standard Class
4 5 US-2015-108966 10/11/2015 10/18/2015 Standard Class
... ... ... ... ... ...
9989 9990 CA-2014-110422 1/21/2014 1/23/2014 Second Class
9990 9991 CA-2017-121258 2/26/2017 3/3/2017 Standard Class
9991 9992 CA-2017-121258 2/26/2017 3/3/2017 Standard Class
9992 9993 CA-2017-121258 2/26/2017 3/3/2017 Standard Class
9993 9994 CA-2017-119914 5/4/2017 5/9/2017 Second Class
Customer ID Customer Name Segment Country City \
0 CG-12520 Claire Gute Consumer United States Henderson
1 CG-12520 Claire Gute Consumer United States Henderson
2 DV-13045 Darrin Van Huff Corporate United States Los Angeles
3 SO-20335 Sean O'Donnell Consumer United States Fort Lauderdale
4 SO-20335 Sean O'Donnell Consumer United States Fort Lauderdale
... ... ... ... ... ...
9989 TB-21400 Tom Boeckenhauer Consumer United States Miami
9990 DB-13060 Dave Brooks Consumer United States Costa Mesa
9991 DB-13060 Dave Brooks Consumer United States Costa Mesa
9992 DB-13060 Dave Brooks Consumer United States Costa Mesa
9993 CC-12220 Chris Cortes Consumer United States Westminster
... Postal Code Region Product ID Category Sub-Category \
0 ... 42420 South FUR-BO-10001798 Furniture Bookcases
1 ... 42420 South FUR-CH-10000454 Furniture Chairs
2 ... 90036 West OFF-LA-10000240 Office Supplies Labels
3 ... 33311 South FUR-TA-10000577 Furniture Tables
4 ... 33311 South OFF-ST-10000760 Office Supplies Storage
... ... ... ... ... ... ...
9989 ... 33180 South FUR-FU-10001889 Furniture Furnishings
9990 ... 92627 West FUR-FU-10000747 Furniture Furnishings
9991 ... 92627 West TEC-PH-10003645 Technology Phones
9992 ... 92627 West OFF-PA-10004041 Office Supplies Paper
9993 ... 92683 West OFF-AP-10002684 Office Supplies Appliances
Product Name Sales Quantity \
0 Bush Somerset Collection Bookcase 261.9600 2
1 Hon Deluxe Fabric Upholstered Stacking Chairs,... 731.9400 3
2 Self-Adhesive Address Labels for Typewriters b... 14.6200 2
3 Bretford CR4500 Series Slim Rectangular Table 957.5775 5
4 Eldon Fold 'N Roll Cart System 22.3680 2
... ... ... ...
9989 Ultra Door Pull Handle 25.2480 3
9990 Tenex B1-RE Series Chair Mats for Low Pile Car... 91.9600 2
9991 Aastra 57i VoIP phone 258.5760 2
9992 It's Hot Message Books with Stickers, 2 3/4" x 5" 29.6000 4
9993 Acco 7-Outlet Masterpiece Power Center, Wihtou... 243.1600 2
Discount Profit
0 0.00 41.9136
1 0.00 219.5820
2 0.00 6.8714
3 0.45 -383.0310
4 0.20 2.5164
... ... ...
9989 0.20 4.1028
9990 0.00 15.6332
9991 0.20 19.3932
9992 0.00 13.3200
9993 0.00 72.9480
[9994 rows x 21 columns]
In [7]:
data.head()
Out[7]:
| Row ID | Order ID | Order Date | Ship Date | Ship Mode | Customer ID | Customer Name | Segment | Country | City | ... | Postal Code | Region | Product ID | Category | Sub-Category | Product Name | Sales | Quantity | Discount | Profit | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | CA-2016-152156 | 11/8/2016 | 11/11/2016 | Second Class | CG-12520 | Claire Gute | Consumer | United States | Henderson | ... | 42420 | South | FUR-BO-10001798 | Furniture | Bookcases | Bush Somerset Collection Bookcase | 261.9600 | 2 | 0.00 | 41.9136 |
| 1 | 2 | CA-2016-152156 | 11/8/2016 | 11/11/2016 | Second Class | CG-12520 | Claire Gute | Consumer | United States | Henderson | ... | 42420 | South | FUR-CH-10000454 | Furniture | Chairs | Hon Deluxe Fabric Upholstered Stacking Chairs,... | 731.9400 | 3 | 0.00 | 219.5820 |
| 2 | 3 | CA-2016-138688 | 6/12/2016 | 6/16/2016 | Second Class | DV-13045 | Darrin Van Huff | Corporate | United States | Los Angeles | ... | 90036 | West | OFF-LA-10000240 | Office Supplies | Labels | Self-Adhesive Address Labels for Typewriters b... | 14.6200 | 2 | 0.00 | 6.8714 |
| 3 | 4 | US-2015-108966 | 10/11/2015 | 10/18/2015 | Standard Class | SO-20335 | Sean O'Donnell | Consumer | United States | Fort Lauderdale | ... | 33311 | South | FUR-TA-10000577 | Furniture | Tables | Bretford CR4500 Series Slim Rectangular Table | 957.5775 | 5 | 0.45 | -383.0310 |
| 4 | 5 | US-2015-108966 | 10/11/2015 | 10/18/2015 | Standard Class | SO-20335 | Sean O'Donnell | Consumer | United States | Fort Lauderdale | ... | 33311 | South | OFF-ST-10000760 | Office Supplies | Storage | Eldon Fold 'N Roll Cart System | 22.3680 | 2 | 0.20 | 2.5164 |
5 rows × 21 columns
In [8]:
data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 9994 entries, 0 to 9993 Data columns (total 21 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Row ID 9994 non-null int64 1 Order ID 9994 non-null object 2 Order Date 9994 non-null object 3 Ship Date 9994 non-null object 4 Ship Mode 9994 non-null object 5 Customer ID 9994 non-null object 6 Customer Name 9994 non-null object 7 Segment 9994 non-null object 8 Country 9994 non-null object 9 City 9994 non-null object 10 State 9994 non-null object 11 Postal Code 9994 non-null int64 12 Region 9994 non-null object 13 Product ID 9994 non-null object 14 Category 9994 non-null object 15 Sub-Category 9994 non-null object 16 Product Name 9994 non-null object 17 Sales 9994 non-null float64 18 Quantity 9994 non-null int64 19 Discount 9994 non-null float64 20 Profit 9994 non-null float64 dtypes: float64(3), int64(3), object(15) memory usage: 1.6+ MB
In [11]:
data['Order Date'] = pd.to_datetime(data['Order Date'])
data['Ship Date'] = pd.to_datetime(data['Ship Date'])
print(data['Order Date'])
0 2016-11-08
1 2016-11-08
2 2016-06-12
3 2015-10-11
4 2015-10-11
...
9989 2014-01-21
9990 2017-02-26
9991 2017-02-26
9992 2017-02-26
9993 2017-05-04
Name: Order Date, Length: 9994, dtype: datetime64[ns]
In [12]:
print(data['Ship Date'])
0 2016-11-11
1 2016-11-11
2 2016-06-16
3 2015-10-18
4 2015-10-18
...
9989 2014-01-23
9990 2017-03-03
9991 2017-03-03
9992 2017-03-03
9993 2017-05-09
Name: Ship Date, Length: 9994, dtype: datetime64[ns]
In [13]:
data['Order Month'] = data['Order Date'].dt.month
data['Order Year'] = data['Order Date'].dt.year
data['Order Day of Week'] = data['Order Date'].dt.dayofweek
print(data['Order Month'])
0 11
1 11
2 6
3 10
4 10
..
9989 1
9990 2
9991 2
9992 2
9993 5
Name: Order Month, Length: 9994, dtype: int32
In [14]:
print(data['Order Year'])
0 2016
1 2016
2 2016
3 2015
4 2015
...
9989 2014
9990 2017
9991 2017
9992 2017
9993 2017
Name: Order Year, Length: 9994, dtype: int32
In [15]:
print(data['Order Day of Week'])
0 1
1 1
2 6
3 6
4 6
..
9989 1
9990 6
9991 6
9992 6
9993 3
Name: Order Day of Week, Length: 9994, dtype: int32
In [16]:
data.head()
Out[16]:
| Row ID | Order ID | Order Date | Ship Date | Ship Mode | Customer ID | Customer Name | Segment | Country | City | ... | Category | Sub-Category | Product Name | Sales | Quantity | Discount | Profit | Order Month | Order Year | Order Day of Week | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | CA-2016-152156 | 2016-11-08 | 2016-11-11 | Second Class | CG-12520 | Claire Gute | Consumer | United States | Henderson | ... | Furniture | Bookcases | Bush Somerset Collection Bookcase | 261.9600 | 2 | 0.00 | 41.9136 | 11 | 2016 | 1 |
| 1 | 2 | CA-2016-152156 | 2016-11-08 | 2016-11-11 | Second Class | CG-12520 | Claire Gute | Consumer | United States | Henderson | ... | Furniture | Chairs | Hon Deluxe Fabric Upholstered Stacking Chairs,... | 731.9400 | 3 | 0.00 | 219.5820 | 11 | 2016 | 1 |
| 2 | 3 | CA-2016-138688 | 2016-06-12 | 2016-06-16 | Second Class | DV-13045 | Darrin Van Huff | Corporate | United States | Los Angeles | ... | Office Supplies | Labels | Self-Adhesive Address Labels for Typewriters b... | 14.6200 | 2 | 0.00 | 6.8714 | 6 | 2016 | 6 |
| 3 | 4 | US-2015-108966 | 2015-10-11 | 2015-10-18 | Standard Class | SO-20335 | Sean O'Donnell | Consumer | United States | Fort Lauderdale | ... | Furniture | Tables | Bretford CR4500 Series Slim Rectangular Table | 957.5775 | 5 | 0.45 | -383.0310 | 10 | 2015 | 6 |
| 4 | 5 | US-2015-108966 | 2015-10-11 | 2015-10-18 | Standard Class | SO-20335 | Sean O'Donnell | Consumer | United States | Fort Lauderdale | ... | Office Supplies | Storage | Eldon Fold 'N Roll Cart System | 22.3680 | 2 | 0.20 | 2.5164 | 10 | 2015 | 6 |
5 rows × 24 columns
In [17]:
sales_by_month = data.groupby('Order Month')['Sales'].sum().reset_index()
fig = px.line(sales_by_month,
x='Order Month',
y='Sales',
title='Monthly Sales Analysis')
fig.show()
In [18]:
sales_by_category = data.groupby('Category')['Sales'].sum().reset_index()
fig = px.pie(sales_by_category,
values='Sales',
names='Category',
hole=0.5,
color_discrete_sequence=px.colors.qualitative.Pastel)
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(title_text='Sales Analysis by Category', title_font=dict(size=24))
fig.show()
In [19]:
sales_by_subcategory = data.groupby('Sub-Category')['Sales'].sum().reset_index()
fig = px.bar(sales_by_subcategory,
x='Sub-Category',
y='Sales',
title='Sales Analysis by Sub-Category')
fig.show()
In [20]:
profit_by_month = data.groupby('Order Month')['Profit'].sum().reset_index()
fig = px.line(profit_by_month,
x='Order Month',
y='Profit',
title='Monthly Profit Analysis')
fig.show()
In [21]:
profit_by_category = data.groupby('Category')['Profit'].sum().reset_index()
fig = px.pie(profit_by_category,
values='Profit',
names='Category',
hole=0.5,
color_discrete_sequence=px.colors.qualitative.Pastel)
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(title_text='Profit Analysis by Category', title_font=dict(size=24))
fig.show()
In [22]:
profit_by_subcategory = data.groupby('Sub-Category')['Profit'].sum().reset_index()
fig = px.bar(profit_by_subcategory, x='Sub-Category',
y='Profit',
title='Profit Analysis by Sub-Category')
fig.show()
In [23]:
sales_profit_by_segment = data.groupby('Segment').agg({'Sales': 'sum', 'Profit': 'sum'}).reset_index()
color_palette = colors.qualitative.Pastel
fig = go.Figure()
fig.add_trace(go.Bar(x=sales_profit_by_segment['Segment'],
y=sales_profit_by_segment['Sales'],
name='Sales',
marker_color=color_palette[0]))
fig.add_trace(go.Bar(x=sales_profit_by_segment['Segment'],
y=sales_profit_by_segment['Profit'],
name='Profit',
marker_color=color_palette[1]))
fig.update_layout(title='Sales and Profit Analysis by Customer Segment',
xaxis_title='Customer Segment', yaxis_title='Amount')
fig.show()
In [25]:
sales_profit_by_segment = data.groupby('Segment').agg({'Sales': 'sum', 'Profit': 'sum'}).reset_index()
sales_profit_by_segment['Sales_to_Profit_Ratio'] = sales_profit_by_segment['Sales'] / sales_profit_by_segment['Profit']
print(sales_profit_by_segment[['Segment', 'Sales_to_Profit_Ratio']])
Segment Sales_to_Profit_Ratio 0 Consumer 8.659471 1 Corporate 7.677245 2 Home Office 7.125416
In [ ]:
In [ ]: